# Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import folium
import requests
from branca.colormap import LinearColormapExplore the Dataset
Demographics: Population, Race, Gender Data County
This dataset provides a detailed breakdown of demographic information for counties across the United States, derived from the U.S. Census Bureau’s 2023 American Community Survey (ACS). The data includes population counts by gender, race, and ethnicity, alongside unique identifiers for each county using State and County FIPS codes.
Dataset Features:
- County: Name of the county.
- State: Name of the state the county belongs to.
- State FIPS Code: Federal Information Processing Standard (FIPS) code for the state.
- County FIPS Code: FIPS code for the county.
- FIPS: Combined State and County FIPS codes, a unique identifier for each county.
- Total Population: Total population in the county.
- Male Population: Number of males in the county.
- Female Population: Number of females in the county.
- Total Race Responses: Total race-related responses recorded in the survey.
- White Alone: Number of individuals identifying as White alone.
- Black or African American Alone: Number of individuals identifying as Black or African American alone.
- Hispanic or Latino: Number of individuals identifying as Hispanic or Latino.
df = pd.read_csv('demographics_data/demographic_data.csv')
df| County | State | State FIPS Code | County FIPS Code | FIPS | Total Population | Male Population | Female Population | Total Race Responses | White Alone | Black or African American Alone | Hispanic or Latino | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Autauga County | Alabama | 1 | 1 | 1001 | 59285 | 28669 | 30616 | 59285 | 43616 | 11829 | 2188 |
| 1 | Baldwin County | Alabama | 1 | 3 | 1003 | 239945 | 117316 | 122629 | 239945 | 198721 | 19144 | 13393 |
| 2 | Barbour County | Alabama | 1 | 5 | 1005 | 24757 | 12906 | 11851 | 24757 | 10891 | 11616 | 1490 |
| 3 | Bibb County | Alabama | 1 | 7 | 1007 | 22152 | 11824 | 10328 | 22152 | 16634 | 4587 | 744 |
| 4 | Blount County | Alabama | 1 | 9 | 1009 | 59292 | 29934 | 29358 | 59292 | 53062 | 747 | 5962 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3217 | Vega Baja Municipio | Puerto Rico | 72 | 145 | 72145 | 54058 | 25765 | 28293 | 54058 | 13681 | 2249 | 53036 |
| 3218 | Vieques Municipio | Puerto Rico | 72 | 147 | 72147 | 8147 | 4178 | 3969 | 8147 | 1028 | 222 | 7803 |
| 3219 | Villalba Municipio | Puerto Rico | 72 | 149 | 72149 | 21778 | 10510 | 11268 | 21778 | 7552 | 2219 | 21700 |
| 3220 | Yabucoa Municipio | Puerto Rico | 72 | 151 | 72151 | 29868 | 14381 | 15487 | 29868 | 2001 | 5900 | 29732 |
| 3221 | Yauco Municipio | Puerto Rico | 72 | 153 | 72153 | 33509 | 15920 | 17589 | 33509 | 24597 | 649 | 33243 |
3222 rows × 12 columns
df.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3222 entries, 0 to 3221
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 County 3222 non-null object
1 State 3222 non-null object
2 State FIPS Code 3222 non-null int64
3 County FIPS Code 3222 non-null int64
4 FIPS 3222 non-null int64
5 Total Population 3222 non-null int64
6 Male Population 3222 non-null int64
7 Female Population 3222 non-null int64
8 Total Race Responses 3222 non-null int64
9 White Alone 3222 non-null int64
10 Black or African American Alone 3222 non-null int64
11 Hispanic or Latino 3222 non-null int64
dtypes: int64(10), object(2)
memory usage: 302.2+ KB
df.isnull().sum()County 0
State 0
State FIPS Code 0
County FIPS Code 0
FIPS 0
Total Population 0
Male Population 0
Female Population 0
Total Race Responses 0
White Alone 0
Black or African American Alone 0
Hispanic or Latino 0
dtype: int64
df_state = df.groupby('State').agg({
'State FIPS Code':'first',
'County FIPS Code':'first',
'Total Population':'sum',
'Male Population':'sum',
'Female Population':'sum',
'White Alone':'sum',
'Black or African American Alone':'sum',
'Hispanic or Latino':'sum'}).reset_index()
df_state| State | State FIPS Code | County FIPS Code | Total Population | Male Population | Female Population | White Alone | Black or African American Alone | Hispanic or Latino | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Alabama | 1 | 1 | 5054253 | 2453419 | 2600834 | 3303370 | 1318507 | 271640 |
| 1 | Alaska | 2 | 13 | 733971 | 385319 | 348652 | 445545 | 22774 | 52473 |
| 2 | Arizona | 4 | 1 | 7268175 | 3628694 | 3639481 | 4593653 | 336931 | 2255770 |
| 3 | Arkansas | 5 | 1 | 3032651 | 1495958 | 1536693 | 2148886 | 452127 | 265833 |
| 4 | California | 6 | 1 | 39242785 | 19605882 | 19636903 | 17248779 | 2173343 | 15630830 |
| 5 | Colorado | 8 | 1 | 5810774 | 2942568 | 2868206 | 4268784 | 232985 | 1291078 |
| 6 | Connecticut | 9 | 110 | 3598348 | 1765117 | 1833231 | 2431342 | 384753 | 640668 |
| 7 | Delaware | 10 | 1 | 1005872 | 487585 | 518287 | 621799 | 220645 | 107829 |
| 8 | District of Columbia | 11 | 1 | 672079 | 320001 | 352078 | 262549 | 290772 | 77760 |
| 9 | Florida | 12 | 1 | 21928881 | 10773620 | 11155261 | 13136701 | 3363769 | 5865737 |
| 10 | Georgia | 13 | 1 | 10822590 | 5281762 | 5540828 | 5677531 | 3391689 | 1158299 |
| 11 | Hawaii | 15 | 1 | 1445635 | 727473 | 718162 | 325356 | 27740 | 142225 |
| 12 | Idaho | 16 | 1 | 1893296 | 952080 | 941216 | 1578020 | 14108 | 252466 |
| 13 | Illinois | 17 | 1 | 12692653 | 6270399 | 6422254 | 8038512 | 1750414 | 2348118 |
| 14 | Indiana | 18 | 1 | 6811752 | 3377011 | 3434741 | 5347678 | 630680 | 569410 |
| 15 | Iowa | 19 | 1 | 3195937 | 1601453 | 1594484 | 2735263 | 123234 | 223471 |
| 16 | Kansas | 20 | 1 | 2937569 | 1473655 | 1463914 | 2289052 | 159829 | 389514 |
| 17 | Kentucky | 21 | 1 | 4510725 | 2233870 | 2276855 | 3774581 | 355237 | 212163 |
| 18 | Louisiana | 22 | 1 | 4621025 | 2262822 | 2358203 | 2678942 | 1434953 | 321022 |
| 19 | Maine | 23 | 1 | 1377400 | 678363 | 699037 | 1258122 | 23145 | 28609 |
| 20 | Maryland | 24 | 1 | 6170738 | 3002079 | 3168659 | 3060731 | 1825880 | 744272 |
| 21 | Massachusetts | 25 | 1 | 6992395 | 3416765 | 3575630 | 4945674 | 489390 | 904679 |
| 22 | Michigan | 26 | 1 | 10051595 | 4982079 | 5069516 | 7516312 | 1346689 | 576808 |
| 23 | Minnesota | 27 | 1 | 5713716 | 2862134 | 2851582 | 4476710 | 388789 | 353608 |
| 24 | Mississippi | 28 | 1 | 2951438 | 1431521 | 1519917 | 1661873 | 1090777 | 106126 |
| 25 | Missouri | 29 | 1 | 6168181 | 3040690 | 3127491 | 4831646 | 686616 | 311924 |
| 26 | Montana | 30 | 1 | 1105072 | 560035 | 545037 | 946776 | 6015 | 48519 |
| 27 | Nebraska | 31 | 1 | 1965926 | 987506 | 978420 | 1570391 | 93595 | 242226 |
| 28 | Nevada | 32 | 1 | 3141000 | 1582476 | 1558524 | 1670302 | 295802 | 917057 |
| 29 | New Hampshire | 33 | 1 | 1387834 | 692568 | 695266 | 1234149 | 21164 | 62758 |
| 30 | New Jersey | 34 | 1 | 9267014 | 4558671 | 4708343 | 5276142 | 1201053 | 2032968 |
| 31 | New Mexico | 35 | 1 | 2114768 | 1050368 | 1064400 | 1133871 | 44709 | 1018321 |
| 32 | New York | 36 | 1 | 19872319 | 9702417 | 10169902 | 11340944 | 2927008 | 3898652 |
| 33 | North Carolina | 37 | 1 | 10584340 | 5177887 | 5406453 | 6695587 | 2178329 | 1158750 |
| 34 | North Dakota | 38 | 1 | 779361 | 399126 | 380235 | 653820 | 25209 | 34963 |
| 35 | Ohio | 39 | 1 | 11780046 | 5809077 | 5970969 | 9167192 | 1446466 | 537559 |
| 36 | Oklahoma | 40 | 1 | 3995260 | 1988686 | 2006574 | 2668453 | 282536 | 490797 |
| 37 | Oregon | 41 | 1 | 4238714 | 2113849 | 2124865 | 3247656 | 81642 | 605467 |
| 38 | Pennsylvania | 42 | 1 | 12986518 | 6400912 | 6585606 | 9844085 | 1393616 | 1087732 |
| 39 | Puerto Rico | 72 | 1 | 3254885 | 1540987 | 1713898 | 1146311 | 237762 | 3215824 |
| 40 | Rhode Island | 44 | 1 | 1095371 | 537173 | 558198 | 792361 | 63862 | 187503 |
| 41 | South Carolina | 45 | 1 | 5212774 | 2537456 | 2675318 | 3339447 | 1318630 | 368900 |
| 42 | South Dakota | 46 | 3 | 899194 | 455597 | 443597 | 733035 | 20149 | 41281 |
| 43 | Tennessee | 47 | 1 | 6986082 | 3428050 | 3558032 | 5133249 | 1108897 | 496457 |
| 44 | Texas | 48 | 1 | 29640343 | 14789987 | 14850356 | 15984990 | 3626137 | 11697134 |
| 45 | Utah | 49 | 1 | 3331187 | 1686562 | 1644625 | 2688129 | 37772 | 513013 |
| 46 | Vermont | 50 | 1 | 645254 | 320321 | 324933 | 589835 | 7887 | 16058 |
| 47 | Virginia | 51 | 1 | 8657499 | 4278490 | 4379009 | 5344175 | 1623031 | 929140 |
| 48 | Washington | 53 | 1 | 7740984 | 3898212 | 3842772 | 5251386 | 306214 | 1089609 |
| 49 | West Virginia | 54 | 1 | 1784462 | 890156 | 894306 | 1622009 | 58519 | 36125 |
| 50 | Wisconsin | 55 | 1 | 5892023 | 2950540 | 2941483 | 4791680 | 361890 | 457687 |
| 51 | Wyoming | 56 | 1 | 579761 | 296646 | 283115 | 498371 | 4982 | 60581 |
df_state.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 State 52 non-null object
1 State FIPS Code 52 non-null int64
2 County FIPS Code 52 non-null int64
3 Total Population 52 non-null int64
4 Male Population 52 non-null int64
5 Female Population 52 non-null int64
6 White Alone 52 non-null int64
7 Black or African American Alone 52 non-null int64
8 Hispanic or Latino 52 non-null int64
dtypes: int64(8), object(1)
memory usage: 3.8+ KB
Population
Total Population
By State
# Map state names to abbreviations
state_abbr = {
"Alabama": "AL", "Alaska": "AK", "Arizona": "AZ", "Arkansas": "AR",
"California": "CA", "Colorado": "CO", "Connecticut": "CT", "Delaware": "DE",
"Florida": "FL", "Georgia": "GA", "Hawaii": "HI", "Idaho": "ID", "Illinois": "IL",
"Indiana": "IN", "Iowa": "IA", "Kansas": "KS", "Kentucky": "KY", "Louisiana": "LA",
"Maine": "ME", "Maryland": "MD", "Massachusetts": "MA", "Michigan": "MI",
"Minnesota": "MN", "Mississippi": "MS", "Missouri": "MO", "Montana": "MT",
"Nebraska": "NE", "Nevada": "NV", "New Hampshire": "NH", "New Jersey": "NJ",
"New Mexico": "NM", "New York": "NY", "North Carolina": "NC", "North Dakota": "ND",
"Ohio": "OH", "Oklahoma": "OK", "Oregon": "OR", "Pennsylvania": "PA",
"Rhode Island": "RI", "South Carolina": "SC", "South Dakota": "SD",
"Tennessee": "TN", "Texas": "TX", "Utah": "UT", "Vermont": "VT",
"Virginia": "VA", "Washington": "WA", "West Virginia": "WV", "Wisconsin": "WI",
"Wyoming": "WY"
}
df_state['State Abbreviation'] = df_state['State'].map(state_abbr)# Ensure formatted columns are numeric for the heatmap color mapping
formatted_columns = ['Total Population', 'Male Population', 'Female Population',
'White Alone', 'Black or African American Alone', 'Hispanic or Latino']
for col in formatted_columns:
df_state[col] = pd.to_numeric(df_state[col], errors='coerce').fillna(0).astype(int)
# Calculate Male and Female Population as a percentage of the Total Population
df_state['Male Population (%)'] = (df_state['Male Population'] / df_state['Total Population'] * 100).round(2)
df_state['Female Population (%)'] = (df_state['Female Population'] / df_state['Total Population'] * 100).round(2)
# Create a separate set of formatted hover data for better readability
for col in formatted_columns:
df_state[f"{col} (Formatted)"] = df_state[col].apply(lambda x: f"{x:,}")# Customize Hover Data
df_state['Male Population (%) (Formatted)'] = df_state['Male Population (%)'].apply(lambda x: f"{x}%")
df_state['Female Population (%) (Formatted)'] = df_state['Female Population (%)'].apply(lambda x: f"{x}%")# Create the heatmap
fig = px.choropleth(
df_state,
locations='State Abbreviation', # Using abbreviations for location mapping
locationmode='USA-states',
color='Total Population',
scope='usa',
title='Heat Map of Total Population by State',
color_continuous_scale='icefire',
hover_data={
'State': True,
'Total Population (Formatted)': True,
'Male Population (%) (Formatted)': True,
'Female Population (%) (Formatted)': True,
'White Alone (Formatted)': True,
'Black or African American Alone (Formatted)': True,
'Hispanic or Latino (Formatted)': True,
},
labels={
'State': 'State',
'Total Population (Formatted)': 'Population',
'Male Population (%) (Formatted)': 'Male Population (%)',
'Female Population (%) (Formatted)': 'Female Population (%)',
'White Alone (Formatted)': 'White Alone',
'Black or African American Alone (Formatted)': 'Black or African American Alone',
'Hispanic or Latino (Formatted)': 'Hispanic or Latino'
}
)
fig.write_html("chloropleth_map.html")
fig.show()Unable to display output for mime type(s): application/vnd.plotly.v1+json
By County
# Load the counties GeoJSON file
geojson_url = "https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
geojson_data = requests.get(geojson_url).json() # Fetch GeoJSON
# Ensure FIPS codes are strings with 5 digits
df["FIPS"] = df["FIPS"].astype(str).str.zfill(5)
# Create a mapping of FIPS to Total Population
population_dict = df.set_index("FIPS")["Total Population"].to_dict()
# Add Total Population to GeoJSON properties
for feature in geojson_data["features"]:
fips = feature["id"]
if fips in population_dict:
feature["properties"]["Total Population"] = population_dict[fips]
else:
feature["properties"]["Total Population"] = None # Handle missing data
# Normalize population for color mapping
min_pop, max_pop = df["Total Population"].min(), df["Total Population"].max()
colormap = LinearColormap(colors=["blue", "white", "red"], # Icefire-like scale
vmin=min_pop, vmax=max_pop)
# Create a Folium map centered in the US
m = folium.Map(location=[37.8, -96], zoom_start=4, tiles="cartodb positron")
# Add the choropleth layer without built-in fill_color
choropleth = folium.Choropleth(
geo_data=geojson_data,
name="choropleth",
data=df,
columns=["FIPS", "Total Population"],
key_on="feature.id",
fill_color=None, # Disable built-in color scale
fill_opacity=0.7,
line_opacity=0.2,
legend_name="Total Population by County"
).add_to(m)
# Apply the custom colormap
for feature in geojson_data["features"]:
fips = feature["id"]
county_data = df[df["FIPS"] == fips]
if not county_data.empty:
pop = county_data["Total Population"].values[0]
color = colormap(pop)
else:
color = "#D3D3D3"
feature["properties"]["style"] = {"fillColor": color, "fillOpacity": 0.7, "color": "black", "weight": 0.2}
# Add interactive tooltips with County Name & Population
tooltip = folium.features.GeoJsonTooltip(
fields=["NAME", "Total Population"],
aliases=["County:", "Population:"],
localize=True,
sticky=True,
labels=True,
style="background-color: white; color: black; font-size: 12px; padding: 5px;"
)
# Attach GeoJSON with tooltips and styles
folium.GeoJson(
geojson_data,
tooltip=tooltip,
style_function=lambda feature: feature["properties"]["style"]
).add_to(m)
# Add the custom colormap to the map
colormap.caption = "Total Population by County"
m.add_child(colormap)
# Display the map
mMake this Notebook Trusted to load map: File -> Trust Notebook
Total Gender Distribution
total_male_population = df_state['Male Population'].sum()
total_female_population = df_state['Female Population'].sum()
pie_data = {
'Gender': ['Male', 'Female'],
'Population': [total_male_population, total_female_population]
}
# Create the pie chart
fig = px.pie(
pie_data,
names='Gender',
values='Population',
title='Total Male Population vs Female Population (USA)',
color='Gender',
color_discrete_map={
'Male': '#0b6380', # blue
'Female': '#d18e3b' # orange
}
)
fig.update_traces(textinfo='percent+label')
fig.show()Unable to display output for mime type(s): application/vnd.plotly.v1+json
# Group by State and sum the male and female populations
state_gender_data = df.groupby('State').agg({
'Male Population': 'sum',
'Female Population': 'sum'
}).reset_index()
# Calculate the difference to determine color coding
state_gender_data['Difference'] = state_gender_data['Female Population'] - state_gender_data['Male Population']
# Map full state names to abbreviations (assuming you have the mapping)
state_gender_data['State'] = state_gender_data['State'].map(state_abbr)
# Create the choropleth map
fig = px.choropleth(state_gender_data,
locations='State',
locationmode='USA-states',
color='Difference',
color_continuous_scale=px.colors.diverging.RdBu_r,
labels={'Difference': 'Female - Male'},
title='Choropleth Map of USA by Gender Population',
scope='usa',
hover_name='State',
hover_data={'Difference': False}, # Hide default difference data
custom_data=['Female Population', 'Male Population']) # Custom data for tooltips
# Update layout
fig.update_geos(fitbounds="locations", visible=False)
fig.update_traces(marker_line_color='black', marker_line_width=0.5)
# Set custom hovertemplate to include state name, female and male populations, and difference
fig.update_traces(
hovertemplate='State: %{hovertext}<br>' +
'Total Female: %{customdata[0]}<br>' +
'Total Male: %{customdata[1]}<br>' +
'Difference: %{z:.2f}k<extra></extra>' # Format difference with 'k'
)
# Update hovertext
fig.for_each_trace(lambda t: t.update(hovertext=state_gender_data['State']))
fig.show()Unable to display output for mime type(s): application/vnd.plotly.v1+json
Race Distribution
sns.pairplot(df[["Total Population", "White Alone", "Black or African American Alone", "Hispanic or Latino"]])
plt.show()
# Scatter plot: Total Population vs. White Alone
plt.figure(figsize=(8,6))
sns.scatterplot(data=df, x="Total Population", y="White Alone", alpha=0.5)
plt.title("Total Population vs. White Alone")
plt.xlabel("Total Population")
plt.ylabel("White Alone Population")
plt.xscale("log")
plt.yscale("log")
plt.show()
# Total Population vs. Black or African American Alone
sns.scatterplot(data=df, x="Total Population", y="Black or African American Alone", alpha=0.5)
plt.title("Total Population vs. Black or African American Alone")
plt.xlabel("Total Population")
plt.ylabel("Black Population")
plt.xscale("log")
plt.yscale("log")
plt.show()
# Total Population vs. Hispanic or Latino
sns.scatterplot(data=df, x="Total Population", y="Hispanic or Latino", alpha=0.5)
plt.title("Total Population vs. Hispanic or Latino")
plt.xlabel("Total Population")
plt.ylabel("Black Population")
plt.xscale("log")
plt.yscale("log")
plt.show()
# Helper function to get race data for a state or county
def get_race_data(state=None, county=None):
if county:
filtered_df = df[(df['State'] == state) & (df['County'] == county)]
elif state:
filtered_df = df[df['State'] == state]
else:
filtered_df = df
race_totals = {
'White Alone': filtered_df['White Alone'].sum(),
'Black or African American Alone': filtered_df['Black or African American Alone'].sum(),
'Hispanic or Latino': filtered_df['Hispanic or Latino'].sum(),
}
return list(race_totals.keys()), list(race_totals.values())# Initialize the figure with data for the US
initial_labels, initial_values = get_race_data()
fig = go.Figure(data=[go.Pie(labels=initial_labels, values=initial_values, name="All States")])
# Create a mapping of states to their county dropdowns
state_to_county_buttons = {}
# Generate dropdown options for counties based on the selected state
for state in df['State'].unique():
state_labels, state_values = get_race_data(state=state)
county_buttons = [
dict(
label="All Counties",
method="update",
args=[
{"labels": [state_labels], "values": [state_values]}, # Update data
{"title": f"Race per Population in {state} (All Counties)"} # Update title
],
)
]
counties = df[df['State'] == state]['County'].unique()
for county in counties:
county_labels, county_values = get_race_data(state=state, county=county)
county_buttons.append(
dict(
label=county,
method="update",
args=[
{"labels": [county_labels], "values": [county_values]}, # Update data
{"title": f"Race per Population in {county}, {state}"} # Update title
],
)
)
state_to_county_buttons[state] = county_buttons
# State dropdown
state_buttons = [
dict(
label="United States",
method="update",
args=[
{"labels": [initial_labels], "values": [initial_values]}, # Update data
{"title": "Race per Population in the United States"} # Update title
],
)
]
# Add state-level buttons
for state in df['State'].unique():
state_buttons.append(
dict(
label=state,
method="update",
args=[
{"labels": [state_to_county_buttons[state][0]['args'][0]['labels']],
"values": [state_to_county_buttons[state][0]['args'][0]['values']]},
{"title": f"Race per Population in {state}"}
]
)
)
# Update layout with separate dropdowns
fig.update_layout(
updatemenus=[
# State dropdown
dict(
buttons=state_buttons,
direction="down",
showactive=True,
x=0.2, # Adjust position near the title
xanchor="left",
y=1.1,
yanchor="top",
pad={"r": 10, "t": 10},
name="State",
),
# County dropdown
dict(
buttons=state_to_county_buttons["California"], # Default to California's counties
direction="down",
showactive=True,
x=0.6, # Adjust position near the state dropdown
xanchor="left",
y=1.1,
yanchor="top",
pad={"r": 10, "t": 10},
name="County",
),
],
title="Race per Population in the United States",
title_x=0.27, # Align title near dropdowns
)
fig.show()Unable to display output for mime type(s): application/vnd.plotly.v1+json